5  Subgroup analysis

Workflow to establish a subgroup analysis after multiple imputation and propensity Score matching

Author

Janick Weberpals, RPh, PhD

Published

September 19, 2024

This is a reproducible example on how to establish a workflow to run a subgroup analysis after multiple imputation and propensity Score matching.

Load packages:

library(dplyr)
library(survival)
library(mice)
library(MatchThem)
library(survey)
library(here)
library(gtsummary)
library(parallelly)
library(ranger)
library(furrr)

source(here("functions", "source_encore.io_functions.R"))

# track time
runtime <- tictoc::tic()

5.1 About

This script is adapted from Noah Greifer’s highly recommended blog post on “Subgroup Analysis After Propensity Score Matching Using R”.

For a more formal manuscript on subgroup analysis with propensity scores, see Green and Stuart.(Green and Stuart 2014)

5.2 Data generation

We again use the simulate_flaura() function to simulate a realistic oncology comparative effectiveness cohort analytic dataset.

# load example dataset with missing observations
data_miss <- simulate_flaura(
  n_total = 3500, 
  seed = 42, 
  include_id = FALSE, 
  imposeNA = TRUE,
  propNA = .13
  ) |> 
  # we simplify and assume a binary Asian/non-Asian race covariate where Asian is the reference group
  mutate(dem_race = ifelse(dem_race == "Asian", 0, 1))

5.3 Moderator covariate

In this example, we assume heterogeneous treatment effect by race and we aim to assess the average treatment effect among the treated for Asian (reference) and non-Asian patients. The effect size is time to all-cause mortality. In this dataset, race is encoded with a binary covariate with 0 = Asian and 1 = non-Asian.

table(data_miss$dem_race, useNA = "ifany")

   0    1 <NA> 
1854 1166  480 

Investigated subgroup effect in the FLAURA trial.

5.4 Multiple imputation

Both the imputation and propensity score step Multiple imputation using mice:

# impute data
data_imp <- futuremice(
  parallelseed = 42,
  n.core = parallel::detectCores()-1,
  data = data_miss,
  method = "rf",
  m = 10,
  print = FALSE
  )

5.5 Propensity score matching and weighting

Apply propensity score matching and weighting with replacement within in each imputed dataset.

# apply propensity score matching on mids object
ps_form <- as.formula(paste("treat ~", paste(covariates_for_ps, collapse = " + ")))
ps_form
treat ~ dem_age_index_cont + dem_sex_cont + c_smoking_history + 
    c_number_met_sites + c_hemoglobin_g_dl_cont + c_urea_nitrogen_mg_dl_cont + 
    c_platelets_10_9_l_cont + c_calcium_mg_dl_cont + c_glucose_mg_dl_cont + 
    c_lymphocyte_leukocyte_ratio_cont + c_alp_u_l_cont + c_protein_g_l_cont + 
    c_alt_u_l_cont + c_albumin_g_l_cont + c_bilirubin_mg_dl_cont + 
    c_chloride_mmol_l_cont + c_monocytes_10_9_l_cont + c_eosinophils_leukocytes_ratio_cont + 
    c_ldh_u_l_cont + c_hr_cont + c_sbp_cont + c_oxygen_cont + 
    c_ecog_cont + c_neutrophil_lymphocyte_ratio_cont + c_bmi_cont + 
    c_ast_alt_ratio_cont + c_stage_initial_dx_cont + dem_race + 
    dem_region + dem_ses + c_time_dx_to_index
match_within_strata <- function(i, 
                                imputed_data = NULL, 
                                ps_formula = NULL, 
                                filter_expr = NULL
                                ){
  
  stratum <- mice::complete(imputed_data, i) |> 
    dplyr::filter(eval(filter_expr))
    
  matched <- MatchIt::matchit(
    formula = ps_formula, 
    data = stratum,
    method = "nearest",
    caliper = 0.01,
    ratio = 1,
    replace = F
    ) |> 
    MatchIt::match.data()
  
  return(matched)
  
}

asian_matched <- lapply(
  X = 1:data_imp$m, 
  FUN = match_within_strata, 
  imputed_data = data_imp,
  ps_formula = ps_form,
  filter_expr = expr(dem_race == 0)
  )

non_asian_matched <- lapply(
  X = 1:data_imp$m, 
  FUN = match_within_strata, 
  imputed_data = data_imp,
  ps_formula = ps_form,
  filter_expr = expr(dem_race == 1)
  )

# combine the mth imputed and matched datasets
combine_list <- function(i, data_0 = NULL, data_1 = NULL){
  
  data_combined <- rbind(data_0[[i]], data_1[[i]])
  
  return(data_combined)
  
}

matched_all <- lapply(
  X = 1:data_imp$m, 
  FUN = combine_list, 
  data_0 = non_asian_matched,
  data_1 = asian_matched
  )
weight_within_strata <- function(i, 
                                 imputed_data = NULL, 
                                 ps_formula = NULL,
                                 filter_expr = NULL
                                 ){
  
  stratum <- mice::complete(imputed_data, i) |> 
    dplyr::filter(eval(filter_expr))
  
  weighted <- WeightIt::weightit(
    formula = ps_formula, 
    data = stratum,
    method = "glm",
    estimand = "ATT"
    )
  
  # trim extreme weights
  weighted <- trim(
    x = weighted, 
    at = .95, 
    lower = TRUE
    )
  
  weighted_data <- mice::complete(imputed_data, i) |> 
    dplyr::filter(eval(filter_expr)) |> 
    mutate(weights = weighted$weights)
  
  return(weighted_data)
  
}

asian_weighted <- lapply(
  X = 1:data_imp$m, 
  FUN = weight_within_strata, 
  imputed_data = data_imp,
  ps_formula = ps_form,
  filter_expr = expr(dem_race == 0)
  )

non_asian_weighted <- lapply(
  X = 1:data_imp$m, 
  FUN = weight_within_strata, 
  imputed_data = data_imp,
  ps_formula = ps_form,
  filter_expr = expr(dem_race == 1)
  )

weighted_all <- lapply(
  X = 1:data_imp$m, 
  FUN = combine_list, 
  data_0 = asian_weighted,
  data_1 = non_asian_weighted
  )

5.6 Outcome model comparisons

cox_fit_matching <- function(i){
  
  survival_fit <- survival::coxph(
    data = i,
    formula = Surv(fu_itt_months, death_itt) ~ treat*dem_race, 
    weights = weights, 
    cluster = subclass,
    robust = TRUE
    )
  
}
matched_all |> 
  lapply(FUN = cox_fit_matching) |> 
  mice::pool() |> 
  broom::tidy(exponentiate = TRUE, conf.int = TRUE) |> 
  dplyr::select(term, estimate, std.error, conf.low, conf.high)
            term  estimate  std.error  conf.low conf.high
1          treat 1.0185770 0.05516435 0.9136438 1.1355621
2       dem_race 1.0202716 0.06323001 0.9011041 1.1551986
3 treat:dem_race 0.5835454 0.10913246 0.4687213 0.7264982
cox_fit_weighting <- function(i){
  
  survival_fit <- survival::coxph(
    data = i,
    formula = Surv(fu_itt_months, death_itt) ~ treat*dem_race, 
    weights = weights, 
    robust = TRUE
    )
  
}
weighted_all |> 
  lapply(FUN = cox_fit_weighting) |> 
  mice::pool() |> 
  broom::tidy(exponentiate = TRUE, conf.int = TRUE) |> 
  dplyr::select(term, estimate, std.error, conf.low, conf.high)
            term  estimate  std.error  conf.low conf.high
1          treat 1.0151071 0.04590853 0.9275734 1.1109013
2       dem_race 1.0112025 0.05886144 0.9005986 1.1353899
3 treat:dem_race 0.5962633 0.07768865 0.5117708 0.6947054

5.7 References

5.8 Session info

Script runtime: 0.86 minutes.

pander::pander(subset(data.frame(sessioninfo::package_info()), attached==TRUE, c(package, loadedversion)))
  package loadedversion
dplyr dplyr 1.1.4
furrr furrr 0.3.1
future future 1.34.0
gtsummary gtsummary 2.0.1
here here 1.0.1
MatchThem MatchThem 1.2.1
Matrix Matrix 1.7-0
mice mice 3.16.0
parallelly parallelly 1.38.0
ranger ranger 0.16.0
survey survey 4.4-2
survival survival 3.5-8
pander::pander(sessionInfo())

R version 4.4.0 (2024-04-24)

Platform: x86_64-pc-linux-gnu

locale: LC_CTYPE=C.UTF-8, LC_NUMERIC=C, LC_TIME=C.UTF-8, LC_COLLATE=C.UTF-8, LC_MONETARY=C.UTF-8, LC_MESSAGES=C.UTF-8, LC_PAPER=C.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=C.UTF-8 and LC_IDENTIFICATION=C

attached base packages: grid, stats, graphics, grDevices, datasets, utils, methods and base

other attached packages: furrr(v.0.3.1), future(v.1.34.0), ranger(v.0.16.0), parallelly(v.1.38.0), gtsummary(v.2.0.1), here(v.1.0.1), survey(v.4.4-2), Matrix(v.1.7-0), MatchThem(v.1.2.1), mice(v.3.16.0), survival(v.3.5-8) and dplyr(v.1.1.4)

loaded via a namespace (and not attached): gtable(v.0.3.5), shape(v.1.4.6.1), xfun(v.0.47), ggplot2(v.3.5.1), htmlwidgets(v.1.6.4), MatchIt(v.4.5.5), lattice(v.0.22-6), simsurv(v.1.0.0), vctrs(v.0.6.5), tools(v.4.4.0), generics(v.0.1.3), parallel(v.4.4.0), tibble(v.3.2.1), fansi(v.1.0.6), pan(v.1.9), pkgconfig(v.2.0.3), jomo(v.2.7-6), assertthat(v.0.2.1), lifecycle(v.1.0.4), stringr(v.1.5.1), compiler(v.4.4.0), tictoc(v.1.2.1), munsell(v.0.5.1), mitools(v.2.4), codetools(v.0.2-20), htmltools(v.0.5.8.1), yaml(v.2.3.10), glmnet(v.4.1-8), pillar(v.1.9.0), nloptr(v.2.1.1), crayon(v.1.5.3), tidyr(v.1.3.1), MASS(v.7.3-60.2), sessioninfo(v.1.2.2), iterators(v.1.0.14), rpart(v.4.1.23), boot(v.1.3-30), foreach(v.1.5.2), mitml(v.0.4-5), nlme(v.3.1-164), locfit(v.1.5-9.10), WeightIt(v.1.3.0), tidyselect(v.1.2.1), digest(v.0.6.37), stringi(v.1.8.4), pander(v.0.6.5), listenv(v.0.9.1), purrr(v.1.0.2), splines(v.4.4.0), rprojroot(v.2.0.4), fastmap(v.1.2.0), colorspace(v.2.1-1), cli(v.3.6.3), magrittr(v.2.0.3), utf8(v.1.2.4), broom(v.1.0.6), withr(v.3.0.1), scales(v.1.3.0), backports(v.1.5.0), rmarkdown(v.2.28), globals(v.0.16.3), nnet(v.7.3-19), lme4(v.1.1-35.5), chk(v.0.9.2), evaluate(v.0.24.0), knitr(v.1.48), rlang(v.1.1.4), Rcpp(v.1.0.13), glue(v.1.7.0), DBI(v.1.2.3), renv(v.1.0.7), minqa(v.1.2.8), jsonlite(v.1.8.8) and R6(v.2.5.1)

pander::pander(options('repos'))
  • repos:

    REPO_NAME
    https://packagemanager.posit.co/cran/linux/noble/latest